###################################################
################## Section 6, Subsection 6.2
################## Shift and share decomposition
###################################################
# we use the ONA for interest rates
# the results are saved in an excel file

# libraries
import os
import pandas as pd

# paths
path_data = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final datasets'
path_charts = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final charts\Interest rates\shift and share decomposition'
if not os.path.isdir(path_charts):
    os.makedirs(path_charts)
 
# load data and apply filters
dt = pd.read_stata(path_data + r'\instr_lvl_dt_f.dta')
dt.rename(columns={'cntry_dbtr':'CNTRY','nace_code':'NACE_CODE','typ_instr_corr':'TYP_INSTR_CORR'}, inplace=True)
# apply filters
dt.drop(columns=['prtctn_amnt_byinstr','ona_orgnlmtrty_c_ttl', 'ona_pddbtr_c_ttl','mtrty_byloantype', 'pddbtr_byloantype','outst_amnt_byinstr','ona_dbtr'], inplace=True)
dt.drop(dt[dt.CNTRY.isin(['CY', 'EE','LT', 'LU', 'LV', 'MT', 'SI', 'SK']) == True].index, inplace=True)
dt.drop(dt[dt.NACE_CODE.isin(['']) == True].index, inplace=True)
dt.drop(dt[dt.wir_byloantype.isna() == True].index, inplace=True)
# rename
dt.rename(columns={'ona_intrate_c_ttl':'ONA_DBTR_LOAN','wir_byloantype':'W_RT_BYTYPE'}, inplace=True)

# shift and share decomposition
# interest rates on firm level
dbtr_ona = dt.groupby(['CNTRY', 'dbtr_id'])['ONA_DBTR_LOAN'].sum().reset_index().rename(columns={'ONA_DBTR_LOAN':'ona_dbtr'})
dt = dt.merge(dbtr_ona, how='left', on=['CNTRY', 'dbtr_id'])
dt['ona_dbtr_shr'] = dt['ONA_DBTR_LOAN']/dt['ona_dbtr']
dt['wir'] = dt['ona_dbtr_shr']*dt['W_RT_BYTYPE']
dbtr_ir = dt.groupby(['CNTRY', 'dbtr_id'])['wir'].sum().reset_index().rename(columns={'wir':'W_RT_DBTR'})
dt = dt.merge(dbtr_ir, how='left', on=['CNTRY', 'dbtr_id'])
dt_dbtr_lvl = dt[['CNTRY', 'dbtr_id', 'sz_f', 'NACE_CODE', 'W_RT_DBTR']].drop_duplicates()

# dataset on instrument level
wijc_sum = dt.groupby(['CNTRY','TYP_INSTR_CORR'])['ona_dbtr_shr'].sum().reset_index().rename(columns={'ona_dbtr_shr':'wijc_sum'})
wij_sum = dt.groupby(['TYP_INSTR_CORR'])['ona_dbtr_shr'].sum().reset_index().rename(columns={'ona_dbtr_shr':'wij_sum'})
dt = dt.merge(wijc_sum, how='left', on=['CNTRY','TYP_INSTR_CORR'])
dt = dt.merge(wij_sum, how='left', on=['TYP_INSTR_CORR'])
dt['wir_i'] = dt['wir']/dt['wijc_sum'] 
dt['wir_i_all'] = dt['wir']/dt['wij_sum'] 

# decompose
def shr_decomp(data, var):
    
    if var == 'sz_f' or var == 'NACE_CODE':
        
        # average interest rate by var
        r_gc = data.groupby(['CNTRY', var])['W_RT_DBTR'].mean().reset_index().rename(columns={'W_RT_DBTR':'r_gc'})
        # cell share
        # count firms by country and var
        n_gc = data.groupby(['CNTRY', var])['dbtr_id'].count().reset_index().rename(columns={'dbtr_id':'n_gc'})
        # count firms by country
        n_c = data.groupby(['CNTRY'])['dbtr_id'].count().reset_index().rename(columns={'dbtr_id':'n_c'})
        # calculate cell share
        w_gc = n_gc.merge(n_c, how='left', on=['CNTRY'])
        w_gc['w_gc'] = w_gc['n_gc']/w_gc['n_c']
        # calculate country interest rate
        wxr = r_gc.merge(w_gc, how='inner', on=['CNTRY', var])
        wxr['wxr'] = wxr['r_gc']*wxr['w_gc']
        r_c = wxr.groupby(['CNTRY'])['wxr'].sum().reset_index().rename(columns={'wxr':'r_c'})
        
        # calculate barred variables
        r_g_bar = data.groupby([var])['W_RT_DBTR'].mean().reset_index().rename(columns={'W_RT_DBTR':'r_g'})
        # cell share
        # count firms by var
        n_g = data.groupby([var])['dbtr_id'].count().reset_index().rename(columns={'dbtr_id':'n_g'})
        # count total firms 
        n = data['dbtr_id'].count()
        # calculate cell share
        w_g_bar = n_g.copy()
        w_g_bar['w_g'] = w_g_bar['n_g']/n
        # calculate interest rate by var
        wxr_bar = r_g_bar.merge(w_g_bar, how='inner', on=[var])
        wxr_bar['wxr'] = wxr_bar['r_g']*wxr_bar['w_g']
        r_bar = wxr_bar['wxr'].sum()
        
        # decomposition
        r_diff = r_c.copy()
        r_diff['diff'] = r_diff['r_c'] - r_bar
        comp = w_gc.merge(r_gc,how='inner',on=['CNTRY', var]).merge(w_g_bar,how='left',on=[var]).merge(r_g_bar,how='left',on=[var])
        comp['comp1'] = (comp['w_gc'] - comp['w_g'])*comp['r_g']
        comp['comp2'] = (comp['r_gc'] - comp['r_g'])*comp['w_g']
        comp['comp3'] = (comp['w_gc'] - comp['w_g'])*(comp['r_gc'] - comp['r_g'])
        # calculate components
        comps = comp.groupby(['CNTRY'])['comp1','comp2','comp3'].sum().reset_index()
        
        final_decomp = pd.concat([r_diff,comps],axis=1)
        
        return final_decomp
    
    else:   
        
        # use interest rates on instrument level
        # ona_dbtr_shr is w_ijc
        # average interest rate by country and var
        r_jc = data.groupby(['CNTRY', var])['wir_i'].sum().reset_index().rename(columns={'wir_i':'r_jc'})
        # cell share - weight of instruments
        n_c = data[['CNTRY','dbtr_id']].groupby(['CNTRY'])['dbtr_id'].nunique().reset_index().rename(columns={'dbtr_id':'n_c'})
        w_jc = data[['CNTRY',var,'wijc_sum']].drop_duplicates()
        w_jc = w_jc.merge(n_c,how='inner',on=['CNTRY'])
        w_jc['w_jc'] = w_jc['wijc_sum']/w_jc['n_c']
        
        # calculate average interest rate on country level
        wxr = r_jc.merge(w_jc,how='inner',on=['CNTRY', var])
        wxr['wxr'] = wxr['r_jc']*wxr['w_jc']
        r_c = wxr.groupby(['CNTRY'])['wxr'].sum().reset_index().rename(columns={'wxr':'r_c'})
        
        # calculate barred variables
        # average interest rate by var
        r_j_bar = data.groupby([var])['wir_i_all'].sum().reset_index().rename(columns={'wir_i_all':'r_j'})
        # cell share
        # number of firms
        n = data[['dbtr_id']].nunique()[0]
        w_j_bar = data.groupby([var])['ona_dbtr_shr'].sum().div(n).reset_index().rename(columns={'ona_dbtr_shr':'w_j'})
        # calculate interest rate by var
        wxr_bar = r_j_bar.merge(w_j_bar, how='inner', on=[var])
        wxr_bar['wxr'] = wxr_bar['r_j']*wxr_bar['w_j']
        r_bar = wxr_bar['wxr'].sum()
                
        # decomposition
        r_diff = r_c.copy()
        r_diff['diff'] = r_diff['r_c'] - r_bar
        comp = w_jc.merge(r_jc,how='inner',on=['CNTRY', var]).merge(w_j_bar,how='left',on=[var]).merge(r_j_bar,how='left',on=[var])
        comp['comp1'] = (comp['w_jc'] - comp['w_j'])*comp['r_j']
        comp['comp2'] = (comp['r_jc'] - comp['r_j'])*comp['w_j']
        comp['comp3'] = (comp['w_jc'] - comp['w_j'])*(comp['r_jc'] - comp['r_j'])
        # calculate components
        comps = comp.groupby(['CNTRY'])['comp1','comp2','comp3'].sum().reset_index()
        
        final_decomp = pd.concat([r_diff,comps],axis=1)
        
        return final_decomp, comp, w_j_bar, n
        


# calculate  
final_decomp_sz = shr_decomp(data=dt_dbtr_lvl, var='sz_f')    
final_decomp_sctr = shr_decomp(data=dt_dbtr_lvl, var='NACE_CODE')    
final_decomp_instr = shr_decomp(data=dt, var='TYP_INSTR_CORR')  

# save results
writer = pd.ExcelWriter(path_charts + r'\share_decomp.xlsx', engine='openpyxl')
final_decomp_sz.to_excel(writer, sheet_name='size')
final_decomp_sctr.to_excel(writer, sheet_name='sector')
final_decomp_instr[0].to_excel(writer, sheet_name='instrument')
writer.save()



